{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import random\n",
    "import pandas as pd\n",
    "import re\n",
    "import jieba\n",
    "import matplotlib.pyplot as plt\n",
    "from functools import reduce\n",
    "from operator import add, mul\n",
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "simple_grammar = \"\"\"\n",
    "sentence => noun_phrase verb_phrase\n",
    "noun_phrase => Article Adj* noun\n",
    "Adj* => null | Adj Adj*\n",
    "verb_phrase => verb noun_phrase\n",
    "Article =>  一个 | 这个\n",
    "noun =>   女人 | 篮球 | 桌子 | 小猫\n",
    "verb => 看着 | 坐在 | 听着 | 看见\n",
    "Adj =>  蓝色的 | 好看的 | 小小的\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def adj(): return random.choice('蓝色的 | 好看的 | 小小的'.split('|')).split()[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'小小的'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adj()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def adj_star():\n",
    "    return random.choice([lambda : '', lambda : adj() + adj_star()])()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'好看的小小的蓝色的'"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adj_star()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "adj_grammar = \"\"\"\n",
    "Adj* => null | Adj Adj*\n",
    "Adj =>  蓝色的 | 好看的 | 小小的\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "#turn str to set\n",
    "def create_grammar(grammar_str, split='=>', line_split='\\n'):\n",
    "    grammar = {}\n",
    "    for line in grammar_str.split(line_split):\n",
    "        if not line.strip(): continue\n",
    "        exp, stmt = line.split(split)\n",
    "        grammar[exp.strip()] = [s.split() for s in stmt.split('|')]\n",
    "    return grammar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Adj*': [['null'], ['Adj', 'Adj*']], 'Adj': [['蓝色的'], ['好看的'], ['小小的']]}"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "create_grammar(adj_grammar)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "example_grammar = create_grammar(simple_grammar)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'sentence': [['noun_phrase', 'verb_phrase']],\n",
       " 'noun_phrase': [['Article', 'Adj*', 'noun']],\n",
       " 'Adj*': [['null'], ['Adj', 'Adj*']],\n",
       " 'verb_phrase': [['verb', 'noun_phrase']],\n",
       " 'Article': [['一个'], ['这个']],\n",
       " 'noun': [['女人'], ['篮球'], ['桌子'], ['小猫']],\n",
       " 'verb': [['看着'], ['坐在'], ['听着'], ['看见']],\n",
       " 'Adj': [['蓝色的'], ['好看的'], ['小小的']]}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "example_grammar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate(gram, target):\n",
    "    if target not in gram: return target\n",
    "    expanded = [generate(gram, t) for t in random.choice(gram[target])]\n",
    "    return ''.join([e if e!= '/n' else '\\n' for e in expanded if e != 'null'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'一个好看的桌子听着这个好看的蓝色的好看的小猫'"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "generate(example_grammar, target='sentence')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "#在西部世界里，一个“人类“的语言可以定义为：\n",
    "human = \"\"\"\n",
    "human = 自己 寻找 活动\n",
    "自己 = 我 | 俺 | 我们 \n",
    "寻找 = 找找 | 想找点 \n",
    "活动 = 乐子 | 玩的\n",
    "\"\"\"\n",
    "\n",
    "#一个“接待员”的语言可以定义为\n",
    "host = \"\"\"\n",
    "host = 寒暄 报数 询问 业务相关 结尾 \n",
    "报数 = 我是 数字 号 ,\n",
    "数字 = 单个数字 | 数字 单个数字 \n",
    "单个数字 = 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 \n",
    "寒暄 = 称谓 打招呼 | 打招呼\n",
    "称谓 = 人称 ,\n",
    "人称 = 先生 | 女士 | 小朋友\n",
    "打招呼 = 你好 | 您好 \n",
    "询问 = 请问你要 | 您需要\n",
    "业务相关 = 玩玩 具体业务\n",
    "玩玩 = null\n",
    "具体业务 = 喝酒 | 打牌 | 打猎 | 赌博\n",
    "结尾 = 吗？\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'我们找找乐子'"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "generate(create_grammar(human, split='='), target='human')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "您好我是2号,请问你要赌博吗？\n",
      "您好我是93号,您需要喝酒吗？\n",
      "女士,您好我是1215179号,您需要打猎吗？\n",
      "先生,您好我是5号,您需要打猎吗？\n",
      "您好我是5558号,请问你要赌博吗？\n",
      "先生,你好我是44号,您需要赌博吗？\n",
      "您好我是57号,您需要赌博吗？\n",
      "先生,你好我是52号,请问你要喝酒吗？\n",
      "你好我是7号,请问你要打牌吗？\n",
      "您好我是5号,请问你要打猎吗？\n",
      "你好我是9号,您需要赌博吗？\n",
      "小朋友,您好我是35号,请问你要打猎吗？\n",
      "小朋友,您好我是4号,请问你要赌博吗？\n",
      "小朋友,你好我是9号,您需要赌博吗？\n",
      "您好我是6号,您需要赌博吗？\n",
      "您好我是5号,您需要打牌吗？\n",
      "你好我是2号,您需要打牌吗？\n",
      "您好我是8号,请问你要打牌吗？\n",
      "您好我是51号,您需要喝酒吗？\n",
      "女士,您好我是61号,您需要打猎吗？\n"
     ]
    }
   ],
   "source": [
    "for i in range(20):\n",
    "    print(generate(gram=create_grammar(host, split='='), target='host'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## data driven"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#define a universal pattern\n",
    "programming = \"\"\"\n",
    "stmt => if_exp | while_exp | assignment \n",
    "assignment => var = var\n",
    "if_exp => if ( var ) { /n .... stmt }\n",
    "while_exp => while ( var ) { /n .... stmt }\n",
    "var => chars number\n",
    "chars => char | char char\n",
    "char => student | name | info  | database | course\n",
    "number => 1 | 2 | 3\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "if(infoname1){\n",
      "....if(studentdatabase2){\n",
      "....database1=name1}}\n"
     ]
    }
   ],
   "source": [
    "print(generate(gram=create_grammar(programming, split='=>'), target='stmt'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = 'D:\\Github\\\\article_9k.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "33425826\n"
     ]
    }
   ],
   "source": [
    "with open(filename, encoding='utf-8') as f:\n",
    "    articles = f.read()\n",
    "    print(len(articles))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "#mark\n",
    "def token(string):\n",
    "    return re.findall('\\w+', string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cut(string): return list(jieba.cut(string))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "TOKEN = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i, line in enumerate(open(filename, encoding='utf-8')):\n",
    "    #if i % 100 == 0: print(i)\n",
    "    if i > 100000: break\n",
    "    TOKEN += cut(line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "15"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reduce(add, [1, 2, 3, 4, 5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1, 2, 3, 3, 43, 5]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[1, 2, 3] + [3, 43, 5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "words_count = Counter(TOKEN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('的', 727467),\n",
       " ('n', 382020),\n",
       " ('在', 269758),\n",
       " ('月', 191329),\n",
       " ('日', 167651),\n",
       " ('新华社', 142558),\n",
       " ('和', 137532),\n",
       " ('了', 127415),\n",
       " ('年', 124717),\n",
       " ('是', 104902),\n",
       " ('\\n', 90612),\n",
       " ('１', 88380),\n",
       " ('０', 85191),\n",
       " ('外代', 83268),\n",
       " ('中', 75258),\n",
       " ('中国', 72415),\n",
       " ('２', 70714),\n",
       " ('2017', 70148),\n",
       " ('将', 63166),\n",
       " ('记者', 62871),\n",
       " ('二线', 62019),\n",
       " ('与', 59590),\n",
       " ('等', 59585),\n",
       " ('为', 58406),\n",
       " ('5', 55139),\n",
       " ('照片', 52329),\n",
       " ('4', 52038),\n",
       " ('对', 51760),\n",
       " ('也', 49486),\n",
       " ('上', 48759),\n",
       " ('有', 47769),\n",
       " ('５', 40949),\n",
       " ('说', 39928),\n",
       " ('他', 38813),\n",
       " ('发展', 38252),\n",
       " ('以', 37489),\n",
       " ('３', 37058),\n",
       " ('国际', 36160),\n",
       " ('nn', 35330),\n",
       " ('４', 34717),\n",
       " ('比赛', 32533),\n",
       " ('到', 31281),\n",
       " ('人', 30666),\n",
       " ('６', 30666),\n",
       " ('从', 30440),\n",
       " ('6', 30232),\n",
       " ('都', 29444),\n",
       " ('不', 29277),\n",
       " ('后', 28346),\n",
       " ('就', 27961),\n",
       " ('并', 27467),\n",
       " ('当日', 27214),\n",
       " ('国家', 26804),\n",
       " ('进行', 26672),\n",
       " ('企业', 26632),\n",
       " ('被', 26526),\n",
       " ('７', 26479),\n",
       " ('3', 25974),\n",
       " ('美国', 25894),\n",
       " ('北京', 25715),\n",
       " ('举行', 25497),\n",
       " ('这', 25187),\n",
       " ('体育', 24938),\n",
       " ('2', 24756),\n",
       " ('1', 24651),\n",
       " ('但', 24436),\n",
       " ('新', 24286),\n",
       " ('个', 23738),\n",
       " ('比', 23453),\n",
       " ('我', 23201),\n",
       " ('一个', 22916),\n",
       " ('表示', 22864),\n",
       " ('还', 22780),\n",
       " ('足球', 22654),\n",
       " ('经济', 22363),\n",
       " ('９', 21959),\n",
       " ('合作', 21817),\n",
       " ('要', 21728),\n",
       " ('已', 21296),\n",
       " ('工作', 21164),\n",
       " ('n5', 20946),\n",
       " ('我们', 20877),\n",
       " ('摄', 20871),\n",
       " ('８', 20752),\n",
       " ('n4', 20658),\n",
       " ('选手', 20040),\n",
       " ('市场', 19768),\n",
       " ('让', 19326),\n",
       " ('一路', 19073),\n",
       " ('一带', 18974),\n",
       " ('建设', 18857),\n",
       " ('通过', 18640),\n",
       " ('日电', 18541),\n",
       " ('时', 18375),\n",
       " ('多', 18281),\n",
       " ('会', 17986),\n",
       " ('而', 17892),\n",
       " ('更', 17842),\n",
       " ('于', 17794),\n",
       " ('问题', 17791)]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "words_count.most_common(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "frequiences = [f for w, f in words_count.most_common(100)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = [i for i in range(100)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x20ed52bd5c0>]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD8CAYAAACLrvgBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3XuQnNV95vHvry/TPTOa0egyI4NGYgQewBiH2xgrZpfEEAuBXYjaMrs42SATtpR4cdZZxxvjJFVU7LBrVyVxTMXWLuFiEXtNCL6gOGBZK9tJbANmMLYECCIhLhoka0a30WU0l+7+7R/v6ZlWq28jZtTSzPOp6uru0+d9z2la6NE573nf19wdERGRWsTq3QERETlzKDRERKRmCg0REamZQkNERGqm0BARkZopNEREpGYKDRERqZlCQ0REaqbQEBGRmiXq3YGptnDhQu/q6qp3N0REzijPPvvsXndvr1ZvxoVGV1cXvb299e6GiMgZxcxer6WepqdERKRmCg0REamZQkNERGqm0BARkZopNEREpGYKDRERqZlCQ0REaqbQCL71XB9ffaqmZcoiIrOWQiP4x1/s5uFn3qh3N0RETmsKjSCdjDEylqt3N0RETmsKjSCViDOSUWiIiFSi0AhSiRjDY9l6d0NE5LRWNTTM7AIz+3nB45CZ/YGZzTezjWa2LTzPC/XNzO4xs+1mttnMLi/Y1+pQf5uZrS4ov8LMtoRt7jEzC+Ul25gOqURMIw0RkSqqhoa7v+zul7r7pcAVwBDwLeBOYJO7dwObwnuA64Hu8FgDrIUoAIC7gPcAVwJ3FYTA2lA3v93KUF6ujSmXSsYZyWikISJSyWSnp64FXnH314FVwLpQvg64KbxeBTzkkaeANjM7C7gO2Oju+939ALARWBk+a3X3J93dgYeK9lWqjSmXH2lEXRARkVImGxq3AF8Prxe5+26A8NwRyhcDOwu26Qtllcr7SpRXamPKpZNx3GEsq9AQESmn5tAwswbgRuAfqlUtUeYnUV4zM1tjZr1m1jswMDCZTcelEtF/Ck1RiYiUN5mRxvXAz9x9T3i/J0wtEZ77Q3kfsKRgu05gV5XyzhLlldo4jrvf6+497t7T3l71boUl5UNjWOdqiIiUNZnQ+DATU1MA64H8CqjVwGMF5beGVVTLgcEwtbQBWGFm88IB8BXAhvDZYTNbHlZN3Vq0r1JtTLlUIg5opCEiUklN9wg3sybg/cDvFhR/DnjEzG4H3gBuDuWPAzcA24lWWt0G4O77zeyzwDOh3mfcfX94/VHgK0Aj8ER4VGpjyqWS+ekpjTRERMqpKTTcfQhYUFS2j2g1VXFdB+4os58HgAdKlPcCF5coL9nGdBgfaWh6SkSkLJ0RHkyMNDQ9JSJSjkIj0IFwEZHqFBqBDoSLiFSn0AgmztPQSENEpByFRpDW6ikRkaoUGsHE6ilNT4mIlKPQCHSehohIdQqNID/S0I2YRETKU2gEOhAuIlKdQiNQaIiIVKfQCMyMhkRM52mIiFSg0CiQTsR07SkRkQoUGgV0n3ARkcoUGgVSGmmIiFSk0CiQSsR0IFxEpAKFRoFUQtNTIiKVKDQKpJMaaYiIVKLQKJBKxHVMQ0SkAoVGgVQyxrCmp0REyqopNMyszcweNbOXzGyrmf2qmc03s41mti08zwt1zczuMbPtZrbZzC4v2M/qUH+bma0uKL/CzLaEbe4xMwvlJduYLlo9JSJSWa0jjS8C33X3C4FLgK3AncAmd+8GNoX3ANcD3eGxBlgLUQAAdwHvAa4E7ioIgbWhbn67laG8XBvTQgfCRUQqqxoaZtYKXA3cD+Duo+5+EFgFrAvV1gE3hdergIc88hTQZmZnAdcBG919v7sfADYCK8Nnre7+pLs78FDRvkq1MS205FZEpLJaRhrnAgPAg2b2nJndZ2bNwCJ33w0QnjtC/cXAzoLt+0JZpfK+EuVUaGNapJNxhYaISAW1hEYCuBxY6+6XAUepPE1kJcr8JMprZmZrzKzXzHoHBgYms+lxUomY7qchIlJBLaHRB/S5+9Ph/aNEIbInTC0RnvsL6i8p2L4T2FWlvLNEORXaOI673+vuPe7e097eXsNXKi2l8zRERCqqGhru/ktgp5ldEIquBV4E1gP5FVCrgcfC6/XArWEV1XJgMEwtbQBWmNm8cAB8BbAhfHbYzJaHVVO3Fu2rVBvTIpWIk805mayCQ0SklESN9X4f+JqZNQA7gNuIAucRM7sdeAO4OdR9HLgB2A4Mhbq4+34z+yzwTKj3GXffH15/FPgK0Ag8ER4AnyvTxrQovBFTIq5TWEREitUUGu7+c6CnxEfXlqjrwB1l9vMA8ECJ8l7g4hLl+0q1MV0KQ6M5dapaFRE5c+if0wXSyTiAztUQESlDoVEglYz+cwzrrHARkZIUGgVSCY00REQqUWgUGD+moZGGiEhJCo0CEyMNhYaISCkKjQLpZH71lKanRERKUWgUGB9paHpKRKQkhUaB8dVTGmmIiJSk0CigA+EiIpUpNAroQLiISGUKjQITlxHR9JSISCkKjQITlxHRSENEpBSFRoGGRP4yIhppiIiUotAoEI8ZybhppCEiUoZCo0gqEdfqKRGRMhQaRVKJmA6Ei4iUodAoEoWGRhoiIqUoNIqkk3GFhohIGQqNIg2JmFZPiYiUUVNomNlrZrbFzH5uZr2hbL6ZbTSzbeF5Xig3M7vHzLab2WYzu7xgP6tD/W1mtrqg/Iqw/+1hW6vUxnRKaaQhIlLWZEYa73P3S929J7y/E9jk7t3ApvAe4HqgOzzWAGshCgDgLuA9wJXAXQUhsDbUzW+3skob0yaViDGikYaISElvZXpqFbAuvF4H3FRQ/pBHngLazOws4Dpgo7vvd/cDwEZgZfis1d2fdHcHHiraV6k2po0OhIuIlFdraDjwPTN71szWhLJF7r4bIDx3hPLFwM6CbftCWaXyvhLlldqYNjoQLiJSXqLGele5+y4z6wA2mtlLFepaiTI/ifKahSBbA7B06dLJbHoCTU+JiJRX00jD3XeF537gW0THJPaEqSXCc3+o3gcsKdi8E9hVpbyzRDkV2iju373u3uPuPe3t7bV8pbJSCY00RETKqRoaZtZsZi3518AK4HlgPZBfAbUaeCy8Xg/cGlZRLQcGw9TSBmCFmc0LB8BXABvCZ4fNbHlYNXVr0b5KtTFtUkmdES4iUk4t01OLgG+FVbAJ4P+6+3fN7BngETO7HXgDuDnUfxy4AdgODAG3Abj7fjP7LPBMqPcZd98fXn8U+ArQCDwRHgCfK9PGtImmpzTSEBEppWpouPsO4JIS5fuAa0uUO3BHmX09ADxQorwXuLjWNqaTpqdERMrTGeFF0skYo9kcudykjsWLiMwKCo0iuk+4iEh5Co0iuk+4iEh5Co0iqWQ+NDTSEBEpptAoMj49pRVUIiInUGgU0fSUiEh5Co0i6WQ00hjWSENE5AQKjSIaaYiIlKfQKDIRGhppiIgUU2gUSSXz52lopCEiUkyhUWR8pKFjGiIiJ1BoFEkndUa4iEg5Co0i+ZHGsG7EJCJyAoVGER0IFxEpT6FRRAfCRUTKU2gU0YFwEZHyFBpFEjEjZpqeEhEpRaFRxMxIJ+OanhIRKUGhUUIqEdO1p0RESqg5NMwsbmbPmdl3wvtlZva0mW0zs783s4ZQngrvt4fPuwr28elQ/rKZXVdQvjKUbTezOwvKS7Yx3aL7hGukISJSbDIjjY8DWwvefx74grt3AweA20P57cABd3878IVQDzO7CLgFeCewEvhyCKI48CXgeuAi4MOhbqU2plUqGdMxDRGREmoKDTPrBD4A3BfeG3AN8Giosg64KbxeFd4TPr821F8FPOzuI+7+KrAduDI8trv7DncfBR4GVlVpY1qlEjGtnhIRKaHWkcZfA38E5P8mXQAcdPdMeN8HLA6vFwM7AcLng6H+eHnRNuXKK7VxHDNbY2a9ZtY7MDBQ41cqT9NTIiKlVQ0NM/sg0O/uzxYWl6jqVT6bqvITC93vdfced+9pb28vVWVS0kkdCBcRKSVRQ52rgBvN7AYgDbQSjTzazCwRRgKdwK5Qvw9YAvSZWQKYC+wvKM8r3KZU+d4KbUyrVCLO0GimekURkVmm6kjD3T/t7p3u3kV0IPv77v5bwA+AD4Vqq4HHwuv14T3h8++7u4fyW8LqqmVAN/BT4BmgO6yUaghtrA/blGtjWqUSOhAuIlLKWzlP41PAJ8xsO9Hxh/tD+f3AglD+CeBOAHd/AXgEeBH4LnCHu2fDKOJjwAai1VmPhLqV2phWWj0lIlJaLdNT49z9h8APw+sdRCufiusMAzeX2f5u4O4S5Y8Dj5coL9nGdEsn4wyNaHpKRKSYzggvoX1Oir1HRolmyEREJE+hUUJ7S4rRbI7BY2P17oqIyGlFoVFCR2sagD2HRurcExGR04tCo4SOlhQA/YeH69wTEZHTi0KjhEVhpNGvkYaIyHEUGiVMjDQUGiIihRQaJTSnEjQ3xDU9JSJSRKFRRkdrWiMNEZEiCo0y2ltS9B/SSENEpJBCo4yOlpRGGiIiRRQaZSxqTdN/aERnhYuIFFBolNHRkuLYWJYjugaViMg4hUYZHa1adisiUkyhUUZHi07wExEpptAoQ5cSERE5kUKjDI00REROpNAoo7UxQSoR00hDRKSAQqMMM6OjVedqiIgUUmhU0NGS1vSUiEiBqqFhZmkz+6mZ/cLMXjCzPwvly8zsaTPbZmZ/b2YNoTwV3m8Pn3cV7OvTofxlM7uuoHxlKNtuZncWlJds41SJzgrX9JSISF4tI40R4Bp3vwS4FFhpZsuBzwNfcPdu4ABwe6h/O3DA3d8OfCHUw8wuAm4B3gmsBL5sZnEziwNfAq4HLgI+HOpSoY1ToqMlpZGGiEiBqqHhkSPhbTI8HLgGeDSUrwNuCq9XhfeEz681MwvlD7v7iLu/CmwHrgyP7e6+w91HgYeBVWGbcm2cEh2taQ6PZDg2mj2VzYqInLZqOqYRRgQ/B/qBjcArwEF3z19jow9YHF4vBnYChM8HgQWF5UXblCtfUKGN4v6tMbNeM+sdGBio5SvVROdqiIgcr6bQcPesu18KdBKNDN5Rqlp4tjKfTVV5qf7d6+497t7T3t5eqspJ6cjf9lUrqEREgEmunnL3g8APgeVAm5klwkedwK7wug9YAhA+nwvsLywv2qZc+d4KbZwS4yMNHdcQEQFqWz3VbmZt4XUj8BvAVuAHwIdCtdXAY+H1+vCe8Pn3Pbq++HrglrC6ahnQDfwUeAboDiulGogOlq8P25Rr45TQ9JSIyPES1atwFrAurHKKAY+4+3fM7EXgYTP7c+A54P5Q/37g78xsO9EI4xYAd3/BzB4BXgQywB3ungUws48BG4A48IC7vxD29akybZwS85oaSMSMPRppiIgANYSGu28GLitRvoPo+EZx+TBwc5l93Q3cXaL8ceDxWts4VWIxi277qpGGiAigM8Kr6mhNM6AD4SIigEKjKp3gJyIyQaFRRUdLij2anhIRARQaVS2Z38TBoTEODY/VuysiInWn0Kiia0ETAG/sG6pzT0RE6k+hUcU5C5oBeG3f0Tr3RESk/hQaVZwTRhqva6QhIqLQqKapIUFHS4rX9mqkISKi0KhB14JmjTRERFBo1OScBU06piEigkKjJl0Lm+k/PMLQaKZ6ZRGRGUyhUQMdDBcRiSg0atAVlt2+rikqEZnlFBo1WBpGGq9ppCEis5xCowat6SQLmhs00hCRWU+hUaNzFjTx2l6NNERkdlNo1Cg6V0MjDRGZ3RQaNepa2MyuwWGGx7L17oqISN0oNGqUX3a7c7+mqERk9qoaGma2xMx+YGZbzewFM/t4KJ9vZhvNbFt4nhfKzczuMbPtZrbZzC4v2NfqUH+bma0uKL/CzLaEbe4xM6vURj10jV/tVqEhIrNXLSONDPCH7v4OYDlwh5ldBNwJbHL3bmBTeA9wPdAdHmuAtRAFAHAX8B7gSuCughBYG+rmt1sZysu1ccrpXA0RkRpCw913u/vPwuvDwFZgMbAKWBeqrQNuCq9XAQ955CmgzczOAq4DNrr7fnc/AGwEVobPWt39SXd34KGifZVq45Sb25SkrSmpa1CJyKw2qWMaZtYFXAY8DSxy990QBQvQEaotBnYWbNYXyiqV95Uop0Ibxf1aY2a9ZtY7MDAwma80KefoarciMsvVHBpmNgf4BvAH7n6oUtUSZX4S5TVz93vdvcfde9rb2yez6aR06Wq3IjLL1RQaZpYkCoyvufs3Q/GeMLVEeO4P5X3AkoLNO4FdVco7S5RXaqMuli1s5s0Dxxg4PFLPboiI1E0tq6cMuB/Y6u5/VfDReiC/Amo18FhB+a1hFdVyYDBMLW0AVpjZvHAAfAWwIXx22MyWh7ZuLdpXqTbq4sZLzsaBB3/8aj27ISJSN7WMNK4Cfhu4xsx+Hh43AJ8D3m9m24D3h/cAjwM7gO3A3wL/FcDd9wOfBZ4Jj8+EMoCPAveFbV4Bngjl5dqoi3Pb53D9xW/j7556ncPDY/XsiohIXVi0YGnm6Onp8d7e3mnb/+a+g9z4Nz/mj2+4kDVXnzdt7YiInEpm9qy791SrpzPCJ+lXOtt473kLuO9fX2Uko0uKiMjsotA4Cb/3a+fRf3iEbz/3Zr27IiJySik0TsK/717IO89u5f/88w5yuZk1vSciUolC4ySYGb/7a+exY+9RNr1U11XAIiKnlELjJN1w8ds4e26aB36k5bciMnsoNE5SIh7j1vd28eSOfbywa7De3REROSUUGm/Bh9+9lMZknAd//Fq9uyIickooNN6CuU1Jbu7pZP3Pd9F/eLje3RERmXYKjbfotquWMZrN8dWn3qh3V0REpp1C4y1atrCZay/s4GtPvc7Lvzxc7+6IiEwrhcYU+P1ruzk2luW6v/4XPvLgT/nJK3vr3SURkWmh0JgCly5p48efuoZPrjif5988xG/+7dN87omXmGnX9RIRUWhMkXnNDXzsmm5+9Kn38ZvvWcr//udXuPMbW8hkc/XumojIlEnUuwMzTToZ5+6bLmbhnBT3bNrGgaFR/sd1F/D2jjlEtwsRETlzKTSmgZnxifefz/ymJH/2nRf53ot7mN/cwPJz5/MnH7iIxW2N9e6iiMhJUWhMo49ctYxr37GIJ1/Zx1Ov7uOJLb9kZOx57v/Iu+vdNRGRk6LQmGZL5jexZH4T//HdS+jueIXPf/clfrJ9L+99+8J6d01EZNJ0IPwUuu2qLha3NfLn/7RVl1QXkTNS1dAwswfMrN/Mni8om29mG81sW3ieF8rNzO4xs+1mttnMLi/YZnWov83MVheUX2FmW8I291g4WlyujTNZOhnnj1ZewIu7D/FN3cBJRM5AtYw0vgKsLCq7E9jk7t3ApvAe4HqgOzzWAGshCgDgLuA9wJXAXQUhsDbUzW+3skobZ7QbLzmbS5a08RcbXubYqG4XKyJnlqqh4e7/AuwvKl4FrAuv1wE3FZQ/5JGngDYzOwu4Dtjo7vvd/QCwEVgZPmt19yc9OhPuoaJ9lWrjjGZm/OkH3sEvDw3zl997ud7dERGZlJM9prHI3XcDhOeOUL4Y2FlQry+UVSrvK1FeqY0z3ru75vPby8/hvh+9ynef313v7oiI1GyqD4SXOnvNT6J8co2arTGzXjPrHRgYmOzmdfGnH3wHlyxp45P/sJkdA0fq3R0RkZqcbGjsCVNLhOf8jbL7gCUF9TqBXVXKO0uUV2rjBO5+r7v3uHtPe3v7SX6lUyuViPPl37qcZNz46Fd/xtGRTL27JCJS1cmGxnogvwJqNfBYQfmtYRXVcmAwTC1tAFaY2bxwAHwFsCF8dtjMlodVU7cW7atUGzPG4rZGvnjLZfxb/2Eu++xGVn3px/zpt7ew/he7ODg0Wu/uiYicwKpdidXMvg78OrAQ2EO0CurbwCPAUuAN4GZ33x/+4v8bohVQQ8Bt7t4b9vM7wB+H3d7t7g+G8h6iFVqNwBPA77u7m9mCUm1U+0I9PT3e29tb6/c/Lfzklb3888sDbO4b5Pk3Bzk8kiFmcPnSeVy5bD4XvK2FC97Wwtvb55CI69QaEZl6Zvasu/dUrTfTLt99JoZGoWzO+UXfQX74Uj8/eHmArbsPkQknAp67sJl7PnwZFy+eW+deishMo9CYIUYzOXbsPcKWvkH+8nv/xr6jI3xq5YX8zlXLiMV01VwRmRoKjRnowNFRPvWNzXzvxT28a/Fc3ndhB+89bwGXLmkjnYzXu3sicgZTaMxQ7s7Dz+zk4Z++wZY3B8k5xGPG0vlNnLuwmSXzm5iTStCcStA5r5Eb3nUWcY1IRKQKhcYsMHhsjJ++up/NfQfZMXCUVwaO8OaBYxwdzZC/HuIlnXP5n//hXbzzbB0HEZHyFBqzmLtzbCzL/9vaz2f+8QUODI3xm1cu5ZwFTTQkYqQSMeY2NjC/uYEFcxroWtCs0YjILFdraOh+GjOQmdHUkODGS87m6u6F/K/HX+JrT79OuauxNzfE+ZXONi5Z0kZLeuKPxPzmBt42N81Zc9Ocu3AODQkt9xWZ7TTSmCVGMllGMjlGMzmGx7IcHBrjwNAoew6NsLnvIM+9cZAXdx8iWyZZWlIJrj6/nfdd2MHV5y+koyV9ir+BiEwnjTTkOKlEnFRiYoVVZ8HdST50RXQll0w2Nz4aybmz/+gouweH6TswxJOv7GPTS/3805boAovntTfzq+ctoGtBM6lEjIbwSMajR3tLiovOatWqLpEZRqEh44rPNj+7rZGz2xq54px5rLp0Mbmc88KuQ/z4lb08+co+vvmzNxmqcE+QRMw4f1EL3Yvm0NaYpLUxSVtTAwvnNNA+J0XXwmbObmuc7q8lIlNIoSE1i8WMd3XO5V2dc/m9XzuPTDbH0dEso5kco9kcY5kcY9kcI5kcbx48xua+g2zuG+S5Nw4yeGyMQ8NjFM6Gxgw+8t5lfGLF+cxJ6Y+iyJlA/6fKSUvEY8xtLH1w/OLFc7nunW87riyXcw4Nj7H3yAgDh0f5zuZdPPiTV3l8y24+ed0FXHRWK+0tKeY3N2g1l8hpSgfCpa5+9sYB/uRbz7N196HxspjBwjkpFrWmaW9J0ZKOTlZsSSVobUwytzFJW1OS5oYE6WScxoY4cxuTzG9uoDWdINxmXkQmQQfC5Yxw+dJ5/OPHruIXfYP0Hxpm75ER+g+P0H9ohD2Hh/nl4DDb+zMcHclweCTDaCZXcX/JuNHW1MC8puj4ydzGJC2pBHPSCRa3NfKuxXN55+K5zG1MnqJvKDKzKDSk7hLxGFecM696RWB4LMvgsTEODo1xbCzLsdEsx8YyHBwaY//RUfYdHeXA0VEODI1y4OgYO/cPcWQkw+HhDIPHxsb3s6g1xVlzGzm7LU37nBQt6SRz0gkak3HyAxULfUvEjGQ8RiJuJGLRyZGtYbTTmk7SEI8RjxvJuB23Qk1kJlJoyBklnYyTTsZZ1Dr580T2HRnhhV2H2PLmIK/tPcruwWFe2n2YHx3Zy5GRTNmTHyejMRkfP9O+qSHqazoRp6khTlMqTlNDglQiRiIWI5mIQqYxGSedjJYqx2NGzIz8IR0Lr2MxI26GGRjRcyIWncTZlIraiYe6ZkYiZlGQxaJ9JuOmaTuZEgoNmTUWzElx9fntXH3+ibcEzl965VjBEuKcR/c3GctGq8Ki185IJhrt5B9jWSeXc0azOQ4OjbLvSDTiOTaaZf/RUYbHsgyNRvs+OpphJJOjHocS4zGjIT5xTk3+dSJm4yOqeHjEDGJm46OrZDwqT4QQSiViNKcSNKei0MufozNxrk40OmuIx0gmYiRjsSgIw35jBQEXM8bbLexfOhGFYSoR020ATiMKDREmLr3S1HBq/pfI5jxaqpzJcWwsy/BYNgomd7I5Hw8V9+hEy6w77k7OozJ3J5NzhkazDI1mGB7Lkgt1c7loH5kQctlcjrHsRPiNZqJl0YVlmbDNWDYKtGwuanN4LEcmmwn78fH+jYxlOTKS4ehotuxVBKZSzCARixGLEUZU0WgrlYxCqzEZJxG38ZFaFIR23OjNIEw9TgRQPBZCLNSJG8RjMVrSCVrS0Z+HfIil4jHSDRMjw+MCMcZxo8SoD2HaMjYRuBZjvH/xMHo80wJRoSFSB/GY0dgQVn5x5h6Uz4dXPgBHQwiNZaOysWxUlskWhp7jhDAMATkRchPBNjyWZTiT41gIpnxg5XI+HpAjod7QaIZMdqJONudkss7hsQw59/HwLRzheeh/ft/5OmOZHEdGMlM2ZVmNjY++CkeDcZLxiXCMjU9NRv/AyWdf/n3eA6vfzdIFTdPaX4WGiJw0MxufimpO1bs3Uys/ZTmWcUay2RBkubD4Iksmmw/JfPhF4ZPz6JykaPQWfZ6/RE8+KKOpz+izfJjmCgJ4JJMjk82Nh2su5yHkJsLOIXpT4FRcVPS0Dw0zWwl8EYgD97n75+rcJRGZBfJTljQAZ/BocKqd1te6NrM48CXgeuAi4MNmdlF9eyUiMnud1qEBXAlsd/cd7j4KPAysqnOfRERmrdM9NBYDOwve94UyERGpg9M9NEqtRTthPYOZrTGzXjPrHRgYOAXdEhGZnU730OgDlhS87wR2FVdy93vdvcfde9rbTzxxS0REpsbpHhrPAN1mtszMGoBbgPV17pOIyKx1Wi+5dfeMmX0M2EC05PYBd3+hzt0SEZm1TuvQAHD3x4HH690PERGZgTdhMrMB4PWT3HwhsHcKu3OmmI3fezZ+Z5id31vfuTbnuHvVg8IzLjTeCjPrreXOVTPNbPzes/E7w+z83vrOU+t0PxAuIiKnEYWGiIjUTKFxvHvr3YE6mY3fezZ+Z5id31vfeQrpmIaIiNRMIw0REamZQiMws5Vm9rKZbTezO+vdn+lgZkvM7AdmttXMXjCzj4fy+Wa20cy2hed59e7rVDOzuJk9Z2bfCe+XmdnT4Tv/fbjiwIxiZm1m9qiZvRR+81+d6b+1mf338Gf7eTP7upmlZ+JvbWYPmFm/mT1fUFbyt7XIPeHvts1mdvlbaVuhway6b0cG+EN3fwewHLgjfM87gU3u3g1sCu9nmo8DWwv8hBLZAAACzElEQVTefx74QvjOB4Db69Kr6fVF4LvufiFwCdH3n7G/tZktBv4b0OPuFxNdReIWZuZv/RVgZVFZud/2eqA7PNYAa99KwwqNyKy4b4e773b3n4XXh4n+EllM9F3XhWrrgJvq08PpYWadwAeA+8J7A64BHg1VZuJ3bgWuBu4HcPdRdz/IDP+tia5y0WhmCaAJ2M0M/K3d/V+A/UXF5X7bVcBDHnkKaDOzs062bYVGZNbdt8PMuoDLgKeBRe6+G6JgATrq17Np8dfAHwG58H4BcNDdM+H9TPy9zwUGgAfDtNx9ZtbMDP6t3f1N4C+AN4jCYhB4lpn/W+eV+22n9O83hUakpvt2zBRmNgf4BvAH7n6o3v2ZTmb2QaDf3Z8tLC5Rdab93gngcmCtu18GHGUGTUWVEubwVwHLgLOBZqKpmWIz7beuZkr/vCs0IjXdt2MmMLMkUWB8zd2/GYr35Ier4bm/Xv2bBlcBN5rZa0TTjtcQjTzawhQGzMzfuw/oc/enw/tHiUJkJv/WvwG86u4D7j4GfBN4LzP/t84r99tO6d9vCo3IrLhvR5jLvx/Y6u5/VfDRemB1eL0aeOxU9226uPun3b3T3buIftfvu/tvAT8APhSqzajvDODuvwR2mtkFoeha4EVm8G9NNC213Myawp/1/Hee0b91gXK/7Xrg1rCKajkwmJ/GOhk6uS8wsxuI/gWav2/H3XXu0pQzs38H/CuwhYn5/T8mOq7xCLCU6H+8m929+CDbGc/Mfh34pLt/0MzOJRp5zAeeA/6zu4/Us39TzcwuJTr43wDsAG4j+ofijP2tzezPgP9EtFLwOeC/EM3fz6jf2sy+Dvw60dVs9wB3Ad+mxG8bAvRviFZbDQG3uXvvSbet0BARkVppekpERGqm0BARkZopNEREpGYKDRERqZlCQ0REaqbQEBGRmik0RESkZgoNERGp2f8HO2bfCL1l0CcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(x, frequiences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x20ed5ed81d0>]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD8CAYAAABw1c+bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl0XPV99/H3V9to36zFsiRbNl6w4xVkkwABAwXMEpYSSAjPCWmhfmhJ0+dp+hRyaEubpaWh6ZKlaTjgQhoCAdIAZTNmj1ktgzew8b7IsrVYlrXY2r/PHzMmwh7ZsqTRSDOf1zk+0v3NvXO/91z4zNVvfvd3zd0REZH4kRDtAkREZGQp+EVE4oyCX0Qkzij4RUTijIJfRCTOKPhFROKMgl9EJM4o+EVE4oyCX0QkziRFu4BwCgoKvKKiItpliIiMGatXr25w98KBrDsqg7+iooKqqqpolyEiMmaY2a6BrquuHhGROKPgFxGJMwp+EZE4o+AXEYkzJw1+M1tmZnVmtqFP23fMbJ2ZrTGzF81sQj/b9oTWWWNmTw9n4SIiMjgDueJ/EFhyTNu97j7X3ecDzwB/08+2R9x9fujfVUOoU0REhslJg9/d3wAaj2lr7rOYAegxXiIiY8Sg+/jN7Htmtge4if6v+FPNrMrM3jGzawa7r4Fwd3708hZe31wfyd2IiIx5gw5+d7/L3cuBh4Gv97PaRHevBL4C/KuZndbf+5nZ0tCHRFV9/amHt5lx32+38+qmulPeVkQkngzHqJ5fAteFe8Hda0I/twOvAQv6exN3v8/dK929srBwQHcdH6cgM0BDa8egthURiReDCn4zm9Zn8SpgU5h18swsEPq9ADgH+Ggw+xuogswUBb+IyEmcdK4eM3sEWAwUmFk1cDdwuZnNAHqBXcBtoXUrgdvc/VZgJvAzM+sl+AFzj7tHOPgDbKlrjeQuRETGvJMGv7vfGKb5gX7WrQJuDf3+FjBnSNWdooLMAG9vPzCSuxQRGXNi6s7dgswATYe76OrpjXYpIiKjVmwFf1YKAAdaO6NciYjI6BVbwZ8ZANAXvCIiJxCTwV+v4BcR6VdMBX/h0Sv+FgW/iEh/Yir4j/bxN6iPX0SkXzEV/OkpSaSnJKqPX0TkBGIq+EHTNoiInEwMBr+mbRAROZEYDP4ADS3q4xcR6U/sBX+WunpERE4k9oI/M0Dj4U66NW2DiEhYMRf8hZkpuEPjYXX3iIiEE3PB/8m0DernFxEJK+aCf5zm6xEROaGYC/6CzKN37yr4RUTCib3gz9IVv4jIicRc8GcFkkhJStB8PSIi/RhQ8JvZMjOrM7MNfdq+Y2brzGyNmb1oZhP62fZmM9sS+nfzcBV+glop1LQNIiL9GugV/4PAkmPa7nX3ue4+H3gG+JtjNzKzfIIPZz8LWATcbWZ5gy93YILTNuiKX0QknAEFv7u/ATQe09bcZzED8DCbXgqscPdGdz8IrOD4D5BhF5y2QVf8IiLhJA1lYzP7HvBV4BBwQZhVSoE9fZarQ20RVZAZYP3eQ5HejYjImDSkL3fd/S53LwceBr4eZhULt1m49zKzpWZWZWZV9fX1QymLgqwUDrR10tsbdlciInFtuEb1/BK4Lkx7NVDeZ7kMqAn3Bu5+n7tXuntlYWHhkIopyAzQ0+s0Heka0vuIiMSiQQe/mU3rs3gVsCnMasuBS8wsL/Sl7iWhtogq0N27IiL9GlAfv5k9AiwGCsysmuBIncvNbAbQC+wCbgutWwnc5u63unujmX0HWBV6q2+7e+NxOxhmBX0euj69OCvSuxMRGVMGFPzufmOY5gf6WbcKuLXP8jJg2aCqG6TC0EPX63XFLyJynJi7cxf6dvVoLL+IyLFiMvhz0pJJTjT18YuIhBGTwW9mFGQGqG1uj3YpIiKjTkwGP8Dkggy21bdFuwwRkVEnZoN/enEWW2tbcNdNXCIifcVs8E8tyqSts4eaQ+ruERHpK2aD/+j4/S21LVGuRERkdInZ4J9WlAnAltrWKFciIjK6xGzw52WkUJAZYEudrvhFRPqK2eCH4FX/Zl3xi4h8SkwH//TiTLbWtWpkj4hIHzEd/FOLs2jt6GafRvaIiHwipoP/ky9469TdIyJyVEwHv4Z0iogcL6aDPz8jhXEZKRrSKSLSR0wHP8C04kw2a0iniMgnYj/4i7LYWquRPSIiR8V88E8vzqSlo5v9mqJZRAQYQPCb2TIzqzOzDX3a7jWzTWa2zsx+Y2a5/Wy708zWm9kaM6sazsIHamrR0S941c8vIgIDu+J/EFhyTNsKYLa7zwU2A986wfYXuPt8d68cXIlDM704OKRzs0b2iIgAAwh+d38DaDym7UV37w4tvgOURaC2YTEuM0C+RvaIiHxiOPr4/xB4vp/XHHjRzFab2dITvYmZLTWzKjOrqq+vH4ayfmdaUaYmaxMRCRlS8JvZXUA38HA/q5zj7mcAlwG3m9l5/b2Xu9/n7pXuXllYWDiUso4zrTiTLRrZIyICDCH4zexm4ErgJu8nUd29JvSzDvgNsGiw+xuK6cVZGtkjIhIyqOA3syXAHcBV7n64n3UyzCzr6O/AJcCGcOtG2rTQyB5N0SwiMrDhnI8AbwMzzKzazG4BfgxkAStCQzX/I7TuBDN7LrRpMbDSzNYC7wHPuvsLETmKkzg6skdz9oiIQNLJVnD3G8M0P9DPujXA5aHftwPzhlTdMBmXGWBcRoqGdIqIEAd37h41rVhP4xIRgTgK/unFWXoal4gIcRT800JP46rR07hEJM7FTfBPL9LUDSIiEE/Br6dxiYgAcRT8eRkpFGQG9AWviMS9uAl+CI7n1xW/iMS7OAv+LLbUtdLbq5E9IhK/4ir4pxVncrizh71NR6JdiohI1MRV8H/yBa+maBaROBZfwa/J2kRE4iv4c9KTKcoKaCy/iMS1uAp+gM9MyObd7Y109/RGuxQRkaiIu+D/0sJy9jYd4aWNtdEuRUQkKuIu+C+eNZ6yvDSWrdwZ7VJERKIi7oI/McH42tkVvLezkQ17D0W7HBGRERd3wQ9ww8JyMlISWfbmjmiXIiIy4gby6MVlZlZnZhv6tN1rZpvMbJ2Z/cbMcvvZdomZfWxmW83szuEsfCiyU5O5vrKc/1lbQ12LpmkWkfgykCv+B4Elx7StAGa7+1xgM/CtYzcys0TgJ8BlwCzgRjObNaRqh9HNZ1fQ3ev84p3d0S5FRGREnTT43f0NoPGYthfdvTu0+A5QFmbTRcBWd9/u7p3Ao8DVQ6x32EwuyODCGUU88t5uejR3j4jEkeHo4/9D4Pkw7aXAnj7L1aG2UeOLZ5ZR39LBW9saol2KiMiIGVLwm9ldQDfwcLiXw7T1e2ltZkvNrMrMqurr64dS1oBdcHoRWalJPPlBzYjsT0RkNBh08JvZzcCVwE0e/gnm1UB5n+UyoN+Edff73L3S3SsLCwsHW9YpSU1O5LLZ41n+4X6OdPaMyD5FRKJtUMFvZkuAO4Cr3P1wP6utAqaZ2WQzSwG+DDw9uDIj55r5pbR2dOtOXhGJGwMZzvkI8DYww8yqzewW4MdAFrDCzNaY2X+E1p1gZs8BhL78/TqwHNgIPObuH0boOAbtrCnjKM4O8NSavdEuRURkRCSdbAV3vzFM8wP9rFsDXN5n+TnguUFXNwISE4yr5k3gP9/cycG2TvIyUqJdkohIRMXlnbvHunp+Kd29zrPr90W7FBGRiFPwE5yqeWpRJr98dzeNbZ3RLkdEJKIU/ICZ8fULprK5toWLfvAaT6yuJvxAJRGRsU/BH3LNglKe+ca5TCnM5C8eX8tXl71HR7eGeIpI7FHw93H6+Gwe/9+f4++u+gy/3dLA957dGO2SRESG3UlH9cSbhATj5rMr2NN4mPtX7mDR5HyunDsh2mWJiAwbXfH3447LTueMibnc+ev17Ghoi3Y5IiLDRsHfj+TEBH78lTNISjT+6OdVvLBhH53dekC7iIx9Cv4TmJCbxo9vPIPmI13c9ov3WfT3L/EPz2/UNM4iMqYp+E/i3GkFvHXnhTz4BwtZWJHPz17fzhtbRmb2UBGRSFDwD0BSYgKLZxTx468sICuQxLPrdIeviIxdCv5TEEhK5OJZxbz44X7194vImKXgP0WXzymhub2bN/XULhEZoxT8p+jz0wvU3SMiY5qC/xSpu0dExjoF/yBcMVfdPSIydin4B+HcaeruEZGxS8E/COruEZGxbCDP3F1mZnVmtqFP2/Vm9qGZ9ZpZ5Qm23Wlm60PP5a0arqJHgyvnBbt7Xvu4LtqliIickoFc8T8ILDmmbQPw+8AbA9j+Anef7+79fkCMRedNK6QoK8Cjq/ZEuxQRkVNy0uB39zeAxmPaNrr7xxGragxISkzg+soyXvu4jn2HjkS7HBGRAYt0H78DL5rZajNbGuF9jbgvVU6k1+GxVdXRLkVEZMAiHfznuPsZwGXA7WZ2Xn8rmtlSM6sys6r6+rExCdrEcemcO7WAx6r2aMZOERkzIhr87l4T+lkH/AZYdIJ173P3SnevLCwsjGRZw+rLi8rZ23SElVs1pl9ExoaIBb+ZZZhZ1tHfgUsIfikcUy6eVUx+RgqPvrc72qWIiAzIQIZzPgK8Dcwws2ozu8XMrjWzauBzwLNmtjy07gQzey60aTGw0szWAu8Bz7r7C5E5jOgJJCVy3RmlrPiolvd3H4x2OSIiJ2Xuo69vurKy0quqxs6w/10H2vjCj1bS3N7NmZPy+NrZFVw+p4TEBIt2aSISJ8xs9UCHzevO3WEwaVwGK++8kL++chb1LR386SMf8Ps/fYtN+5ujXZqIyHEU/MMkOzWZW86dzKt/sZh/+dI8qhsPc+UPV/L9FzbR2tEd7fJERD6hrp4IOdjWyXef3civ368mIyWRq+ZP4MsLJzK3LAczdQGJyPBSV88okJeRwg9umMeTt5/DFXNLePKDGq7+yZv8+2vbol2aiMQ5BX+EzS/P5ftfnMe7d13ExbOK+eHLW9jbpCkeRCR6FPwjJDs1mbu/MAuA77+wKcrViEg8U/CPoLK8dJaeN4Wn1tSwepfG/ItIdCj4R9ht559GcXaAbz/zEb2a30dEokDBP8IyAkn85aWns3ZPEw+s3MFoHFUlIrFNwR8F1y4o5fzphXzvuY3c+lAVtc3t0S5JROKIgj8KEhKMZV9byF9fOYs3tzVw8T+/zgMrd9Dc3hXt0kQkDugGrijb0dDGt/57He9sbyQtOZFrFkzg0s+Mp2JcBqV5aSQn6rNZRE7uVG7gUvCPEuurD/GLd3bx1Nq9tHf1ApCYYFy7oJR/un5elKsTkdFOwT+GNbd3sWlfC7sOtPHm1gaeXFPDQ3+4iPOnj52H04jIyDuV4E+KdDFyarJTk1k0OZ9Fk/O5en4p7+9u4h+e28i5Uws0zbOIDAt1II9iKUkJ/OWSGWza38Kv39cD3UVkeCj4R7kr5pQwvzyXH7z4MUc6e6JdjojEAAX/KGdm3HXFTGqbO/jHFzaxetdBdjS06UNARAbtpH38ZrYMuBKoc/fZobbrgb8FZgKL3D3sN7FmtgT4NyARuN/d7xmmuuPKwop8rphbwoNv7eTBt3YCwRE/s0qyWViRz3nTCzh/eqHm+ReRATnpqB4zOw9oBX7eJ/hnAr3Az4C/CBf8ZpYIbAYuBqqBVcCN7v7RyYqK51E9/enpdT6sOcSBtk4aWzvZeaCNVTsb+WB3Ex3dvVw2ezzfu3YO+Rkp0S5VRKJgWEf1uPsbZlZxTNvG0I5OtOkiYKu7bw+t+yhwNXDS4JfjJSYYc8tyj2vv7O5l2Zs7+MGLH1O16yB/feUsSnPTAEhNTmDm+GwSNBpIRPqI5HDOUmBPn+Vq4Kz+VjazpcBSgIkTJ0awrNiSkpTAbeefxnnTCvnzx9bwjUc++NTr47NTuXJuCdcsKGV2aU6UqhSR0SSSwR/uMrPffiV3vw+4D4JdPZEqKlbNmpDNU18/h9U7D9IVmu65oaWD5zfs46G3d3L/yh38v0tncPsFU6NbqIhEXSSDvxoo77NcBtREcH9xL5CUyNlTCz7Vdt2ZZRw63MXdT2/g3uUfAyj8ReJcJIN/FTDNzCYDe4EvA1+J4P6kHznpyfzghvmYmcJfRAY0nPMRYDFQYGbVwN1AI/AjoBB41szWuPulZjaB4LDNy92928y+DiwnOJxzmbt/GKkDkRNLTLBPJnu7d3nwZrBvXjJdQ0BF4tBARvXc2M9Lvwmzbg1weZ/l54DnBl2dDKuj4Z+anMCPX91KfUsH37t2Nkma+lkkrmiStjiTmGD8/bVzKMwM8MNXtnKgrZM/XjyFoqxUCrMCpCYnRrtEEYkwBX8cMjP+/JIZjMsM8Lf/8yEvbaz95LWsQBKFWQEKsgJ8qbKc684si2KlIhIJCv44dvPZFSyeUcj2hjbqWzp+96+1gy21LXzz8bVU7Wrk7i98Rn8JiMQQBX+cmzQug0njMo5r7+7p5QcrNvPT17axYW8z371mNrNLc/RMAJEYoOCXsJISE7hjyeksKM/lm4+t5eqfvElOWjKfnZLPBTOKuGx2CTnpydEuU0QGQY9elJM60NrBb7c08Na2Bt7ceoC9TUdISUxg8YxC/uSCqcwvP34OIREZWXrmrkSMu7N+7yGe/KCGJ9fsJSUxgTfvvFBdQCJRdirBrwHcckrMgrOE/s0XZvHda2azv7mdN7c2RLssETkFCn4ZtItmFpGTlswTq/U8YJGxRMEvgxZISuSqeRNY/uF+Dh3pinY5IjJACn4Zki+eWUZHdy/PrtsX7VJEZIAU/DIkc8tymFaUyROr95x8ZREZFRT8MiRmxhfPLOP93U1sr2+NdjkiMgAKfhmyaxeUkmDwuL7kFRkTFPwyZEXZqVx4ejH/8fo2bnlwFW9tbWA03h8iIkEKfhkW/3T9XL5x4TTW7GniK/e/y+U/XMmz6/bR26sPAJHRRnfuyrBq7+rh6bU1/Oz1bWyrb2NaUSZ/dN4UFk8vpCg7NdrlicSsYZ2ywcyWAVcCde4+O9SWD/wKqAB2Aje4+8Ew2/YA60OLu939qoEUpeAf+3p6nefW7+NHr2xhc23wS98phRmcc1oBV8wtYVFFPgma5kFk2Ax38J8HtAI/7xP83wca3f0eM7sTyHP3O8Js2+rumad6AAr+2NHb63xY08zb2xt4e9sB3tneyJGuHsZnp3L5nBLOnJTHnNIcyvPT9PxfkSEY9knazKwCeKZP8H8MLHb3fWZWArzm7jPCbKfgl0853NnNSxvreHrNXt7Y3EBnTy8A+RkpXLuglK9+blLY5wOIyImNRPA3uXtun9cPuntemO26gTVAN3CPuz85kKIU/PGho7uHzftbWb/3EG9ua2D5hv30uHPBjCL+12cncv70Is36KTJAoyn4J7h7jZlNAV4BLnL3bf3sYymwFGDixIln7tq1ayD1SwypbW7n4Xd388t3d9PQ2kFpbhpfWljOF+ZNYHKB/goQOZFR09VzzHs8GHqPJ062P13xx7fO7l5e2ljLL9/dzcrQlM9TCjO4eGYxt35+CoVZgShXKDL6nErwD/bRi08DNwP3hH4+FaaIPOCwu3eYWQFwDvD9Qe5P4khKUgKXzynh8jklVB88zMsb63hpYy0PrNzB65vreey2z5Gdqsc+igzWSW/gMrNHgLeBGWZWbWa3EAz8i81sC3BxaBkzqzSz+0ObzgSqzGwt8CrBPv6PInEQErvK8tK5+ewK/uuWs1j2tYVsrWvlj3+xms7u3miXJjJm6QYuGVN+vbqabz6+lmvmT+Cfb5ivewFEQkaiq0ckKq47s4z9ze3cu/xjehy+e/VsctLV7SNyKhT8Mub8yeLTAPiXFZt5b8cB7rluLhfMKIpyVSJjhyZpkzHHzLj9gqk8efs55KQl8wf/uYqvLnuPX6+uprldj4AUORn18cuY1tHdw89e386vVu1hb9MRUpISKM/73fQPmYEkyvPTKctLozwvnQm5qZTlpVGWl05qcmKUqxcZPsM+jn+kKfjlVLk7H+xp4tl1+9h/qD3YhnPoSBd7Go9Q03SE7j5TRGelJnHTWZP4g3MqKNasoRIDFPwix+jpdWqb26lpOsLepiO8+FEtz6/fR2KCceHpRZTnpVOUHeC0wkwumFGk0UIy5mhUj8gxEhOMCblpTMhNoxK4en4puw8c5v6V23nt43pe31xPe1fw3oA5pTn81RUzOWvKuOgWLRIhuuIXIdhV1NLRzUsf1XLv8o/Zd6idC08v4vdmFrOwIo+pRZmaNlpGNV3xi5wiMyM7NZnfP6OMy2aX8MDK7Tz41i5e2VQHBKeNXjyjkEtmjee86QWkp+h/HRm7dMUv0g93Z+eBw6za0chb2xp4ZVMdze3dpCQmMLkgg6nFmUwrymRhRT5nTsrTKCGJKl3xiwwDM2NyQQaTCzK4YWE5XT29rNrRyBtbGthS28L66kM8t34f7sGJ5Son5TFpXAYFmSnkZ6SQnpJIcmICKUkJZAaSyE1PITctmfRAIoGkRAJJCQSSEtSFJCNOwS8yQMmJCZw9tYCzpxZ80tba0c17Ow7w5tYDvLejkRUf1dLY1kHvAP+QLs1N44q5wZlI55Xl6ENARoS6ekSGWU9v8P6BI109dHX30tHdS2tHN4eOdNJ0uIvDnT10dPfS3tVD1c5GVm5toKvHSUlMIDM1icxAEnkZKZTmplKam0Z5fjoV44J/eUzITdNTySQsdfWIRFFigpGfkTLg9Q8d7mLFxlq21LXQ1tFNa3s3B9o62bS/hZc31tHRZwrqtORE5pTmMK88hzllucwcn0VFQQbJiZp9RQZOwS8SZTnpyXzxzLKwr7k79S0d7GhoY0dDG5v2t7C2uomH3t5FZ/cOgE++bD46NUVZXhr5GSnkpaeQl5HCpPx08k7hg0hin4JfZBQzM4qyUynKTv3UDWWd3b1srm1hS10LH+9vZWtdK9UHD/P2tgbaOnuOe5+89GQmF2RQmpfO+OwA43PSOH96IVOLMkfycGSUUPCLjEEpSQnMLs1hdmnOp9rdneYj3TQe7uTg4U4OtHay60Ab2+rb2NHQyvrqJl481E5Hdy/fARZW5PHlhRO5aGYRuen6qyBeKPhFYoiZkZOeTE56MpPJCLuOu1Pb3MGTa/by6Hu7+ebjawGoGJfOvPJc5pTmMKskm5kl2eoiilEDGtVjZsuAK4E6d58dassHfgVUADuBG9z9YJhtbwb+KrT4XXd/6GT706gekZHR2+us3n2Q93Y0sq66ibV7DrG/uf2T10tz05g/MZcF5blMGpdBekoiqcmJZAaSyE5LIjs1mfSURA1DHQWGfXZOMzsPaAV+3if4vw80uvs9ZnYnkOfudxyzXT5QBVQCDqwGzgz3AdGXgl8kehpaO9i4r5mPappZV32INXua2Nt0pN/1UxITKMwKUJgVoCwvjfnluZwxKY/PTMgmkKS7mUfKsA/ndPc3zKzimOargcWh3x8CXgPuOGadS4EV7t4YKmwFsAR4ZCD7FZGRV5AZ4PPTCvn8tMJP2upa2qk91MGRrh6OdPXQ2t5Nc3sXzUe6aDzcSX1LB/UtHXywu4ln1u0DgkNPz59eyKWzi7lwRrGejTyKDKWPv9jd9wG4+z4zC/fQ01JgT5/l6lCbiIwhRVmpFGUN7IE1dc3tvL/7ICu3NrDio1pe+HA/ALnpyYzPTqUkJ5W89BRy01MYlxmc/G5WSba6i0ZQpL/cDXcmw/YtmdlSYCnAxIkTI1mTiERQUXYqS2aXsGR2Cd++ajZrq5t4e/sBapqOsK+pnf3N7WyubeXQkS5aO7q5d/nHnFaYwZVzJzCzJIvi7FRKctIozg7owyBChhL8tWZWErraLwHqwqxTze+6gwDKCHYJHcfd7wPug2Af/xDqEpFRIiHBWDAxjwUT88K+3tjWyfMb9vH0mhp++MoW+n7lmBlIYsb4LGaMz6I4K5Ws1CQyU5NISUzALDiCKSs1iaKsAMXZqeSnp+jJaQM0lOB/GrgZuCf086kw6ywH/t7Mjp71S4BvDWGfIhJD8jNSuOmsSdx01iQOHe6iuukwtc3t7G1qZ0ttC5v2tfDM2hqa27tP+l4JBrnpKeSlJzOvLJc/vWgakwvCD2mNdwMKfjN7hOCVe4GZVQN3Ewz8x8zsFmA3cH1o3UrgNne/1d0bzew7wKrQW3376Be9IiJ9Be8/yOEzE3KOe627JzjRXUt7N109vTjB+xEOHemmrrmdupYODrR20Bi6ae35Dft5am0NN1SW8yeLT6M8P33kD2gU0+ycIhJz6lra+fdXt/Hwu7vo6nHK8tJYVJHP2VMLuGz2eDICsXfv6rCP4x9pCn4RGQ7VBw+z/MNaqnY2smpnIw2tnWQGkvjCvAlcd0Yp88pzY2ZmUwW/iMgx3J3Vuw7y6Ko9PLOuhvauXlKTE5hblsv88lyKQjehFWYGKM5JZXx26pj6y0DBLyJyAs3tXfx2cwOrdx1k9e6DbKxpprOn97j1slOTmDUhmzmhCfEm5KZRkBn8gMgcZR8KehCLiMgJZKcmc8XcEq6YWwL8blbT+tbgF8W1ze3sP9RB9cHDbKhpDj3/4NMfDAWZKUwpzOS0wgzGZ6dRkJVCQWaAOaEPiNFMwS8ica/vrKZTi7KOe72rp5cdDW3UNrdT39JBbXMHOxva2FbfyvIPa2ls6/zU+tOLMzl/eiEzS7I/+QshNz2Z9JQkMlISSYry9woKfhGRk0hOTGB6cRbTi4//UIDgg3EOtAU/EFbtaOT1zfU89NausN1HAIGkBDICSaQlJxJISgALTnMwLiPAY7d9LoJHEqTgFxEZopSkBEpy0ijJCc5O+kfnTaG9q4d9h4J/ITS0dtAcmqKiraOHw13dHO7ooa2zm87u4H0JOGSljkwkK/hFRCIgNTmRyQUZo/Lu4dgYwCoiIgOm4BcRiTMKfhGROKPgFxGJMwp+EZE4o+AXEYkzCn4RkTij4BcRiTOjcnZOM6sHdg1y8wKgYRjLGQvi8ZghPo87Ho8Z4vO4T/WYJ7l74UBWHJXBPxRmVjXQqUljRTweM8TnccfjMUN8Hnckj1ldPSIicUbBLyISZ2Ix+O+LdgFREI/HDPF53PF4zBCfxx32XBxxAAADmUlEQVSxY465Pn4RETmxWLziFxGRE4iZ4DezJWb2sZltNbM7o11PpJhZuZm9amYbzexDM/uzUHu+ma0wsy2hn3nRrnW4mVmimX1gZs+Elieb2buhY/6VmaVEu8bhZma5ZvaEmW0KnfPPxfq5NrP/G/pve4OZPWJmqbF4rs1smZnVmdmGPm1hz60F/TCUb+vM7Iyh7Dsmgt/MEoGfAJcBs4AbzWxWdKuKmG7gm+4+E/gscHvoWO8EXnb3acDLoeVY82fAxj7L/wj8S+iYDwK3RKWqyPo34AV3Px2YR/D4Y/Zcm1kp8A2g0t1nA4nAl4nNc/0gsOSYtv7O7WXAtNC/pcBPh7LjmAh+YBGw1d23u3sn8ChwdZRrigh33+fu74d+byEYBKUEj/eh0GoPAddEp8LIMLMy4Arg/tCyARcCT4RWicVjzgbOAx4AcPdOd28ixs81wScDpplZEpAO7CMGz7W7vwE0HtPc37m9Gvi5B70D5JpZyWD3HSvBXwrs6bNcHWqLaWZWASwA3gWK3X0fBD8cgKLoVRYR/wr8JXD06dXjgCZ37w4tx+I5nwLUA/8Z6uK638wyiOFz7e57gX8CdhMM/EPAamL/XB/V37kd1oyLleC3MG0xPVzJzDKBXwP/x92bo11PJJnZlUCdu6/u2xxm1Vg750nAGcBP3X0B0EYMdeuEE+rTvhqYDEwAMgh2cxwr1s71yQzrf++xEvzVQHmf5TKgJkq1RJyZJRMM/Yfd/b9DzbVH//QL/ayLVn0RcA5wlZntJNiNdyHBvwByQ90BEJvnvBqodvd3Q8tPEPwgiOVz/XvADnevd/cu4L+Bs4n9c31Uf+d2WDMuVoJ/FTAt9M1/CsEvg56Ock0REerbfgDY6O7/3Oelp4GbQ7/fDDw10rVFirt/y93L3L2C4Ll9xd1vAl4FvhhaLaaOGcDd9wN7zGxGqOki4CNi+FwT7OL5rJmlh/5bP3rMMX2u++jv3D4NfDU0uuezwKGjXUKD4u4x8Q+4HNgMbAPuinY9ETzOcwn+ibcOWBP6dznBPu+XgS2hn/nRrjVCx78YeCb0+xTgPWAr8DgQiHZ9ETje+UBV6Hw/CeTF+rkG/g7YBGwA/gsIxOK5Bh4h+D1GF8Er+lv6O7cEu3p+Esq39QRHPQ1637pzV0QkzsRKV4+IiAyQgl9EJM4o+EVE4oyCX0Qkzij4RUTijIJfRCTOKPhFROKMgl9EJM78fzHRps6qLwueAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(x, np.log(frequiences))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prob_1(word):\n",
    "    return words_count[word] / len(TOKEN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0011559951084634823"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prob_1('我们')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['此外', '自', '本周', '6', '月', '12', '日起', '除', '小米', '手机']"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "TOKEN[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "TOKEN = [str(t) for t in TOKEN]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "TOKEN_2_GRAM = [''.join(TOKEN[i:i + 2]) for i in range(len(TOKEN[:-2]))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['此外自', '自本周', '本周6', '6月', '月12', '12日起', '日起除', '除小米', '小米手机', '手机6']"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "TOKEN_2_GRAM[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "words_count_2 = Counter(TOKEN_2_GRAM)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prob_2(word1, word2):\n",
    "    if word1 + word2 in words_count_2:\n",
    "        return words_count_2[word1 + word2] / len(TOKEN_2_GRAM)\n",
    "    else:\n",
    "        return 1 / len(TOKEN_2_GRAM)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.106353056792606e-05"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prob_2('我们', '在')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.537171224229244e-08"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prob_2('天','很蓝')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_probablity(sentence):\n",
    "    words = cut(sentence)\n",
    "    \n",
    "    sentence_pro = 1\n",
    "    \n",
    "    for i, word in enumerate(words[:-1]):\n",
    "        next_ = words[i + 1]\n",
    "        probablity = prob_2(word, next_)\n",
    "        sentence_pro *= probablity\n",
    "    return sentence_pro"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.987972028919815e-20"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_probablity('我们今天去游玩')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.390405588817686e-58"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_probablity('天青色等烟雨，而我在等你')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sentence: 一个篮球看见一个好看的好看的桌子 with prob 1.4172740499101991e-53\n",
      "sentence: 一个好看的蓝色的女人看见这个小猫 with prob 4.1791546438988375e-51\n",
      "sentence: 这个小猫看见一个好看的篮球 with prob 3.7353622132963974e-40\n",
      "sentence: 这个蓝色的小小的蓝色的蓝色的小小的桌子听着一个好看的桌子 with prob 1.304276522044045e-95\n",
      "sentence: 这个桌子看着这个篮球 with prob 1.222067418098731e-28\n",
      "sentence: 这个蓝色的篮球看见这个好看的好看的女人 with prob 2.050143832855451e-64\n",
      "sentence: 一个小猫看着这个小小的好看的小猫 with prob 1.6637161740828435e-46\n",
      "sentence: 这个篮球听着这个蓝色的好看的女人 with prob 1.5748528429919893e-57\n",
      "sentence: 这个篮球坐在这个桌子 with prob 9.40051860075947e-30\n",
      "sentence: 一个小猫坐在一个小猫 with prob 5.640311160455683e-29\n"
     ]
    }
   ],
   "source": [
    "for sen in [generate(gram=example_grammar, target='sentence') for i in range(10)]:\n",
    "    print('sentence: {} with prob {}'.format(sen, get_probablity(sen)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "今天晚上请你吃大餐，我们一起吃日料 is more possible.\n",
      "---- 今天晚上请你吃大餐，我们一起吃日料 with probability 1.860800017996553e-66\n",
      "---- 明天晚上请你吃大餐，我们一起吃苹果 with probability 1.2405333453310353e-66\n",
      "真是一只好看的小猫 is more possible.\n",
      "---- 真事一只好看的小猫 with probability 4.6378582450260765e-34\n",
      "---- 真是一只好看的小猫 with probability 8.375862073276688e-27\n",
      "今晚我去吃火锅 is more possible.\n",
      "---- 今晚我去吃火锅 with probability 3.565193898142961e-20\n",
      "---- 今晚火锅去吃我 with probability 7.0503889505696015e-28\n",
      "养乐多绿来一杯 is more possible.\n",
      "---- 洋葱奶昔来一杯 with probability 1.6977113800680764e-22\n",
      "---- 养乐多绿来一杯 with probability 3.0660265166432384e-15\n"
     ]
    }
   ],
   "source": [
    "need_compared = [\n",
    "    \"今天晚上请你吃大餐，我们一起吃日料 明天晚上请你吃大餐，我们一起吃苹果\",\n",
    "    \"真事一只好看的小猫 真是一只好看的小猫\",\n",
    "    \"今晚我去吃火锅 今晚火锅去吃我\",\n",
    "    \"洋葱奶昔来一杯 养乐多绿来一杯\"\n",
    "]\n",
    "\n",
    "for s in need_compared:\n",
    "    s1, s2 = s.split()\n",
    "    p1, p2 = get_probablity(s1), get_probablity(s2)\n",
    "    \n",
    "    better = s1 if p1 > p2 else s2\n",
    "    print('{} is more possible.'.format(better))\n",
    "    print('-'*4 + ' {} with probability {}'.format(s1, p1))\n",
    "    print('-'*4 + ' {} with probability {}'.format(s2, p2))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
